#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @File  : WeixinSpider.py
# @Author: tongnian
# @Date  : 2018/11/29
# @Desc  :
import requests, threading
import datetime
from bs4 import BeautifulSoup
from SpiderAction.News import *
import emoji


def getWeixinArticleInfo(news_url):
    web_data = requests.get(news_url)
    web_data.encoding = 'utf-8'

    # 标题
    soup = BeautifulSoup(web_data.text, 'lxml')

    if(len(soup.select('.rich_media_title'))==0):
        title="未获取标题"
    else:
        title = soup.select('.rich_media_title')[0].text.replace(" ","")
    # 日期
    now = datetime.datetime.now()
    date = now.strftime("%Y-%m-%d")

    # 引用源
    if(len(soup.select('.rich_media_meta_nickname a'))==0):
        Subscription = news_url
    else:
        Subscription = soup.select('.rich_media_meta_nickname a')[0].text.replace(" ", "")

    if(len(soup.select('.profile_meta_value'))==0):
        Wechat_number="未获取微信号"
    else:
        Wechat_number = soup.select('.profile_meta_value')[0].text.replace(" ","")

    # 正文
    if(len(soup.select('.rich_media_content'))==0):
        article="未获取正文"
    else:
        article = str(soup.select('.rich_media_content'))
    article = emoji.demojize(article)

    News = NewsSpider(title=title, date=date, sourceName=Subscription, sourceUrl="微信号：" + Wechat_number,
                      article=article, status="wait", spider_source="sougou_weixin", examine="suspend")

    News.save()


def get_weixin_list():
    response = requests.get("http://weixin.sogou.com/pcindex/pc/pc_2/pc_2.html")
    response.encoding = "utf-8"  # 手动指定字符编码为utf-8
    soup = BeautifulSoup(response.text, 'lxml')
    news_list = soup.select('.txt-box h3')
    num = len(news_list)
    for i in range(num):
        title = news_list[i].text
        time = soup.select('.txt-box .s-p .s2')[i].get('t')
        news_date = datetime.datetime.fromtimestamp(int(time))
        news_date = news_date.strftime("%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        now = now.strftime("%Y-%m-%d %H:%M:%S")
        d1 = datetime.datetime.strptime(news_date, '%Y-%m-%d %H:%M:%S')
        d2 = datetime.datetime.strptime(now, '%Y-%m-%d %H:%M:%S')
        print(d2 -d1)
        if (d2 - d1).seconds < 3599:
            url = soup.select('.txt-box h3 a')[i].get('href')
            print(url)
            getWeixinArticleInfo(url)
    print("爬虫结束")

# def get_Sougou_News():
#     get_weixin_list()
#     timer = threading.Timer(3600, get_Sougou_News)  # 一小时调用一次函数
#     timer.start()

#
if __name__ == '__main__':
    get_weixin_list()
    #getWeixinArticleInfo("https://mp.weixin.qq.com/s?src=11&timestamp=1546065003&ver=1317&signature=phdzbmXrFAaumgylQEuSUvFiMYHVI2EbNVqrlJGNvPBRHJv7hfNi3K0RC82Zj5nEQCmezMn3VVPhXumQC7hzsQPV6KqBlXqjJBCiGFKcX06*UcdwpJbLd1*mByIDW0rH&new=1")